{
 "cells": [
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "# 多因子数据探索"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 假设检验"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 4,
   "metadata": {},
   "outputs": [],
   "source": [
    "import numpy as np\n",
    "import scipy.stats as ss"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 5,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([ 0.54583651,  0.12684918,  0.4429434 , -0.42516543,  0.49901631,\n",
       "        0.12287419, -0.36590323, -0.97259822, -0.30700347,  0.06935185,\n",
       "        0.79107645,  1.6017795 ,  0.36359728, -0.54685468,  0.23501521,\n",
       "       -0.17216385,  1.15114342, -0.26190627, -1.3299745 ,  0.79158252])"
      ]
     },
     "execution_count": 5,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 生成正态分布\n",
    "norm_dist = ss.norm.rvs(size=20)\n",
    "norm_dist"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 6,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "NormaltestResult(statistic=0.23319240531955351, pvalue=0.8899444778325278)"
      ]
     },
     "execution_count": 6,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 基于偏度和峰度的正态检验\n",
    "ss.normaltest(norm_dist)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 7,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(126.08080808080808, 2.9521414005078985e-29, 1, array([[55., 55.],\n",
       "        [45., 45.]]))"
      ]
     },
     "execution_count": 7,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 卡方检验\n",
    "ss.chi2_contingency([[15,95],[85,5]])"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 8,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=-0.06670275173565719, pvalue=0.9472923595793121)"
      ]
     },
     "execution_count": 8,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 独立t分布检验，检验两组值的均值是否有比较大的差异性\n",
    "ss.ttest_ind(ss.norm.rvs(size=10),ss.norm.rvs(size=20))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 9,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=0.6690022317186542, pvalue=0.5040119878623686)"
      ]
     },
     "execution_count": 9,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "ss.ttest_ind(ss.norm.rvs(size=100),ss.norm.rvs(size=200))"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 10,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "Ttest_indResult(statistic=-0.037019439471572595, pvalue=0.9713765928038277)"
      ]
     },
     "execution_count": 10,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "a = [19,22,38,77.6,52]\n",
    "b = [19.5,23,39,77.9,52]\n",
    "ss.ttest_ind(a,b)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 11,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "F_onewayResult(statistic=17.619417475728156, pvalue=0.0002687153079821641)"
      ]
     },
     "execution_count": 11,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# f检验，方差检验\n",
    "ss.f_oneway([49,50,39,40,43],[28,32,30,26,34],[38,40,45,42,48])"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### QQ图"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "除了通过假设检验的方法，还可通过QQ图对比一个分布和已知的分布是否一致\n",
    "\n",
    "已知一个分布可以找到分位数，对应的分位数找到正态分布的分位数，横轴是正态分布分位数的值，纵轴是已知分布的值，得到一个曲线或散点图，这个图正对着x轴和y轴的角平分线，和平分线重合，就是符合分布的"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 12,
   "metadata": {},
   "outputs": [],
   "source": [
    "from statsmodels.graphics.api import qqplot\n",
    "from matplotlib import pyplot as plt\n",
    "%matplotlib inline"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 13,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "image/png": "iVBORw0KGgoAAAANSUhEUgAAAYAAAAEGCAYAAABsLkJ6AAAABHNCSVQICAgIfAhkiAAAAAlwSFlzAAALEgAACxIB0t1+/AAAADh0RVh0U29mdHdhcmUAbWF0cGxvdGxpYiB2ZXJzaW9uMy4xLjAsIGh0dHA6Ly9tYXRwbG90bGliLm9yZy+17YcXAAAbX0lEQVR4nO3dfZBkdX3v8fdnh8VlLiphdit6hZkhQIhoEJIRQW/uFSQJRoNFEmPIisimskWIuqRMBVOb+IRbasj1JoIUGYPKQ8dIQkwMPiAQFCXxYYEFlmy0UHcXEnLdBUHIeoXd+d4/zmmnt7cfTj+cPqf7fF5VU9N9Zuj+Thf7+57z/f3O96eIwMzMqmdF0QGYmVkxnADMzCrKCcDMrKKcAMzMKsoJwMysog4qOoBerF69Oubn54sOw8xsrNx55527I2JN8/GxSgDz8/Ns3ry56DDMzMaKpB2tjhdWApK0StLXJN0j6X5J7yoqFjOzKiryCuCHwOkR8aSklcCXJX02Ir5SYExmZpVRWAKI5BbkJ9OnK9Mv35ZsZjYiha4CkjQlaQvwXeDmiPhqi99ZL2mzpM27du0afZBmZhOq0AQQEfsi4kTgCOBkSS9s8TuLEbEQEQtr1hwwiW1mZn0qxX0AEfEY8AXgzIJDMTMbK7UazM/DihXJ91ot+39b5CqgNZIOSx8fApwB/FtR8ZiZjZtaDdavhx07ICL5vn599iRQ5BXAc4HbJN0LfJ1kDuDGAuMxMxsrGzfCnj37H9uzJzmeRZGrgO4FTirq/c3Mxt3Onb0db1aKOQAzM+vd7Gxvx5s5AZiZjalNm2B6ev9j09PJ8SycAMzMxtTatbC4CHNzICXfFxeT41mMVTM4MzPb39q12Qf8Zr4CMDOrKCcAM7OKcgIwM6soJwAzs4pyAjAzK8AgPXyGxauAzMxGrN7Dp97God7DB/pf0dMPXwGYmY3YoD18hsUJwMxsxAbt4TMsTgBmZiM2aA+fYXECMDMbsUF7+AyLE4CZ2YgN2sNnWLwKyMysAIP08BkWXwGYmVWUE4CZWUU5AZiZVZQTgJlZRTkBmJlVlBOAmVlFOQGYmVWUE4CZWUU5AZiZVZQTgJlZRTkBmJlVlBOAmdkQlGGLx14VlgAkHSnpNknbJN0vaUNRsZiZDaK+xeOOHRCxvMVj2ZNAkVcAe4G3RsTzgVOA35V0fIHxmJn1pSxbPPaqsAQQEQ9HxF3p4yeAbcDziorHzKxfZdnisVelmAOQNA+cBHy1xc/WS9osafOuXbtGHZqZWVdl2eKxV4UnAEmHAjcAF0XE95t/HhGLEbEQEQtr1qwZfYBmZl2UZYvHXhWaACStJBn8axHxd0XGYmbWr7Js8dirwraElCTgKmBbRHygqDjMzIahDFs89qrIK4CXAecCp0vakn79UoHxmJlVSmFXABHxZUBFvb+ZWdUVPglsZlZ243iXbxaFXQGYmY2D+l2+9Ru96nf5wvjV/Jv5CsDMKqPdmXynM/xxvcs3C18BmFkltDuTv+MOuPrq9mf443qXbxa+AjCzSmh3Jr+42PkMf1zv8s3CCcDMJlJzWWfHjta/t29f6+P1M/xxvcs3CycAM5s4rdozq82i86mp1sfrZ/jjepdvFk4AZjZxWpV7Ig5MAtPTSaLodoa/di1s3w5LS8n3SRj8wQnAzCZQuwnaiAPP5K+4YnLP8LvxKiAzmzizs61r/nNzyRl8s3Hs4zMMvgIws4kzyRO3w+QEYGZjr3nFD1S3rNMLl4DMbKy1u8FrcbF1uceW+QrAzMbaJLdqyJsTgJmNlaw3eE1Cq4a8uQRkZmOjVblHSpZ3NpuEVg158xWAmY2NXm7w8oqf7pwAzKzUGks+7co9rW7w8oqf7lwCMrPSai75tNPuBi/rrOsVgKSjJT0jffxySW+RdFj+oZlZFTWe8Z93XvfB3+We/mUpAd0A7JN0DHAVcBTwV7lGZWaV1NzFs12rZnC5ZxiylICWImKvpLOBP4uIyyTdnXdgZlY9rSZ5W3HJZziyXAE8Lekc4DzgxvTYyvxCMrMqaCz1rF6dfLWb5G3kks/wZEkA5wOnApsi4juSjgKuyzcsM5s0zQP+unXLpZ5HHkm+2pmacsknD11LQBHxr5IuBmbT598B3pd3YGY2OZpX83Qa7JtNT3vQz0uWVUC/DGwBPpc+P1HSp/IOzMwmR9bafjOf8ecrSwnoncDJwGMAEbGFZCWQmVlH9bJPltp+s/pErwf//GRJAHsj4vGmYy06b5iZLQ/6Epx7bn+Dvyd6RyNLAtgq6TeBKUnHSroM+OdhvLmkj0j6rqStw3g9MytW4zp+aN2krW7lSpiZSRLFzMzyY5d9RidLAngz8ALgh8DHge8DFw3p/T8GnDmk1zKzgvWyjv+jH4Xdu2FpKflef+yyz+hkWQW0B9iYfg1VRNwuaX7Yr2tmo1GrJYP+zp1w+OHZVvf4Jq7yaJsAJP0jHWr9EXFWLhGZWSnVB/sdO5J1+fv27d+LP8vg79p+uXS6AvjTkUXRgaT1wHqAWe/wYFaI5nX89R49nWr8dfUkMTeXDP4u75RH2wQQEV8cZSDtRMQisAiwsLDg1UdmI1arJV05OzVm6+Taaz3ol1WnEtD1EfHrku6jRSkoIk7INTIzK0xjuafdlotZzM158C+zTiWgDen3V+f15pI+DrwcWC3pIeAdEXFVXu9nZt01l3v6Hfxd7y+/TiWgh9OHF0bExY0/k/R+4OID/6veRMQ5g76GmQ1PP+We+hXCzEzy/NFHkw3ZXe8vvyz3Afx8i2OvHHYgZlas+pl/L4P/3FxS44/wWv5x1GkO4HeAC4GfkHRvw4+eCdyRd2BmNjq9nPm7O+fk6DQH8FfAZ4H3Am9rOP5ERDyaa1RmlrteJnq9lHMydZoDeBx4HDhH0hTw4+nvHyrp0IjYOaIYzWyIajXYsGH/G7c6Df5TU3D11R70J1HXVhCS3kTSEvr/Akvp4QC8DNRsDAyypNPlnsmWZVP4i4DjIqKHPXzMrAwGWdI5NeXBf9JlWQX0IEkpyMzGTL87cU1Pu+xTBVmuAL4NfEHSp0laQgMQER/ILSoz61mrZm298ERv9WRJADvTr4PTLzMrkVaTur0O/jMz8Od/7kG/arLsB/CuUQRiZr1rrvH3ygN/tWVZBbQG+AOSXcFW1Y9HxOk5xmVmXQzSpdNlHoNsJaAa8AmSpnAXAOcBu/IMysz2N8zunN6Ny+qyrAKaSTt0Ph0RX4yIdcApOcdlZqleNlrvxN05rVmWBPB0+v1hSa+SdBJwRI4xmVmDfpZyrkj/ZU9NJd/n5rym3w6UpQT0HknPBt4KXAY8C/i9XKMysx/Z2UPTFbdtsF5kWQV0Y/rwceC0fMMxs2azs8vln07ctsF6lWUV0EdpvSXkulwiMrMfqdXgySe7/56Xc1o/spSAbmx4vAo4G/iPfMIxM2h9c1crHvhtEFlKQDc0Pk/38b0lt4jMKq7bzV1eymnDkmUVULNjgdlhB2JmiW6rfnqZFDbrJMscwBMkcwBKv/8nQ9gQ3swOVKt1n/Cd9emXDUmWEtAzRxGIWdXVSz+d+GYuG6aOCUDSIcBa4Pj00GbgbyPiqbwDM6uaDRs6l3484WvD1nYOQNJPA9uAnwO2AzuAXwTukHSYpPeMJEKzCqjVOq/4ue462L3bg78NV6crgA8Cvx0RNzcelHQGsBW4P8/AzCZdY4O3TubmPPBbPjolgOc2D/4AEXGLpKdJ7gcwsx5kXd/fyDV/y0unZaArJD2j+aCkVSSdQfvcgsJs8tVqMD+ftG4+6KDk+4oV8PrX9zb4z8z47N/y0ykBXAPcIGm+fiB9fD1wbZ5BmY2zCy+Ec89dLu3UN2zptY3z9HQy6WuWl7YloIh4j6Q3AbdLmk4P/xfwpxFx2UiiMxsztRpceWX/Pfvrpqbc2M3y1/FO4Ii4PCJmgaOAoyJibpiDv6QzJX1D0gOS3jas1zUrQn2LxkEH/+lpt3S20cjUCiIinoiIJ4b5xpKmgA8BryS5z+AcScd3/q/MyqdWg9Wrk/p+P/vzwvIGLt64xUapn15Aw3Iy8EBEfDu9seyvgdcUGI9ZTxoH/l4mdhvNzCRr/PftS64ctm/34G+jU2QCeB7wYMPzh9Jj+5G0XtJmSZt37fJe9FasxtU9vQ78jdszXnddMuD75i4rUtcEIGla0h9L+nD6/FhJrx7Ce6vFsVYbzyxGxEJELKxZs2YIb2vWu8az/Sy7c9VNTS0P9nv3+izfyiXLFcBHgR8Cp6bPHwKG0QbiIeDIhudH4I1mrITqyzp7LfN4MtfKLksCODoi/gR4GiAifkDrs/defR04VtJRkg4GfgP41BBe12xo+l3WOTPjyVwrvyxbQj6VdgUNAElHk1wRDCQi9qb3GdwETAEfiQj3F7LS6GdZpzt22jjJkgDeAXwOOFJSDXgZ8MZhvHlEfAb4zDBey2wQ/fToaSTBBRfAFVcMNy6zPGXZEOZmSXcBp5CUfjZExO7cIzMbgUEG/hUrYGkpWdWzaZPP+m38tE0Akn6m6dDD6fdZSbMRcVd+YZnlr9vm6+34bN8mRacrgP/d4WcBnD7kWMxyl7UHfztTU17ZY5OjUzO400YZiFne+j3jr5ue9soemyxd5wDS/v8XAv+D5Mz/S8CVEfH/co7NbKg2bux/8PfqHptEWe4DuAZ4AXAZcDlJ4zbvB2Bjp5eyT2NzNu/Ha5MqyzLQ4yLiRQ3Pb5N0T14BmQ2qsc4/NZU0WlOGWxdd37eqyXIFcLekU+pPJL0EuCO/kMz6V6/z97obl9s2WBVluQJ4CfAGSTvT57PANkn3ARERJ+QWnVkP6nfu9tqT3+v4raqyJIAzc4/CbED1M/9+Bv/t23MJyaz0stwJvEPSj5F07jyo4bhvBLNCDbqmf3o6OfM3q6osy0AvIen98y2W+/X7RjArzKB9e8DLOs0gWwno10laQj+VdzBm3fR7M1d9NZDr/WbLsiSArcBhwHdzjsWsq15u5vKdu2adZUkA7yVZCrqVhn0AIuKs3KIya2Pnzu6/A8kZvwd/s86yJICrgfcD9wFL+YZj1tnsbPdJX5/5m2WTJQHsjogP5h6JWQabNrWeA3BvfrPeZUkAd0p6L8l+vY0lIC8DtUIccshyAvBqHrP+ZUkAJ6XfT2k45mWgNlLtln7+4AfFxGM2CbLcCOZ9AaxQnZZ+7tmTrAzyFYBZ77JcASDpVSQtoVfVj0XEu/MKyqqpVRfPmRn43veS+n47WVcGmdn+stwJfCUwDZwG/CXwa8DXco7LKqb5LL/e0yfL3b6zs/nFZTbJsrSDfmlEvAH4XkS8CziVpC+Q2VDUu3j2s1uX+/mY9S9LAqhPs+2R9N+Bp4Gj8gvJqqTfLp6QlIe83t+sf1nmAG6UdBhwKXAXyQqgD+calVVGP/v0eucus+HIsgrokvThDZJuBFZFxOP5hmVVUKv13srZd/maDU/bEpCkF0t6TsPzNwDXA5dIOnwUwdnkqpd+upmZSb6k5C5fD/5mw9NpDuAvgKcAJP1P4H3ANcDjwGL+odkk61T6mZ6G665L9vHdvTv5WlpKdu7y4G82PJ0SwFREPJo+fh2wGBE3RMQfA8cM8qaSXivpfklLkhYGeS0bP91KPz7LNxuNjglAUn2O4BXAPzX8LNMNZB1sBX4FuH3A17Ex0630Mzfnwd9sVDoN5B8HvihpN8lS0C8BSDqGpAzUt4jYlr7WIC9jYyTL/r1e0282Wm0TQERsknQr8Fzg8xFR3w94BfDmUQQHIGk9sB5g1rd8jp1e9u916cdstDqWciLiKy2OfTPLC0u6BXhOix9tjIh/yBYeRMQi6aTzwsJCdPl1K4leN2536cds9Aat5bcVEWfk9dpWvHpJZ+dOODxdFPzII8tN3KRkFU8WLv2YFSO3BGCTq7lxW+NZfr2lQ9bB3zt4mRUnSy+goZN0tqSHSBrLfVrSTUXEYb2p1WB+Hl7/+v4atzWqr/X32n6z4hRyBRARnwQ+WcR7W2utSjqPPrp/eaeXsk4n3sbRrBxcArKOJZ3Gx4MO/h74zcqlkBKQlUs/HTmzqN/mMTeXlHt27/bgb1YmTgA28JaK9YZtkKwCgmTQv/ba5KrBdX6zcnICqLD6pG6/pZ36RG69YVsE7N3rQd9sXDgBVFS97t9rP/7Gso7v3DUbb04AFVI/41+xovMevI09+Jv78busYzY5vAqoIppX+rTbg1dKyjlmNvl8BTDBsp7xN3K/PbPqcAKYII0D/urVsG5dUuOPaH/G38g9ecyqxSWgCdHpZq5OpqaS7RZnZ92Tx6xqfAUwJhrP7ufnk+f9lHgaTU/D1Vd7v12zqvIVwBhoPrvfsQPOPz+ZsH3qqeRYlhIP+IzfzJY5AZRQY2O22Vl48skDz+6ffrr3152e9tp9M1vmBFAyrc72+7VyJTzrWUlXT5/xm1kzJ4CSGbQxm0s8ZpaVJ4EL0mpSF7I3Zlu5Eg4+eP9jntQ1s174CqAArco869cnj2dnW5d9Zmbg0EOX5wXq6/Ub5wp8xm9mvVAMY4unEVlYWIjNmzcXHcbA5udbD/L1/XEbkwN48tbMBiPpzohYaD7uElAB2pV5du5MBvnFxSQZ1BuwefA3szw4AeSoXZ2/Xb+d+vG1a5Mavmv5ZpYnzwHkpFOdv12Zx314zGyUnABy0mo55549yfHt25d/xxO4ZlYUTwLnZMWK1lstSklpx8xsVDwJPGLd6vxmZkVzAsjJpk1JXb+R6/xmViZOADnxck4zKztPAudo7VoP+GZWXr4C6FO7Nf5mZuOikCsASZcCvww8BXwLOD8iHisiln50WuPvM34zGxdFXQHcDLwwIk4Avgn8YUFx9KXTGn8zs3FRSAKIiM9HxN706VeAI4qIo1+devmYmY2LMswBrAM+2+6HktZL2ixp865du0YYVnte429mkyC3BCDpFklbW3y9puF3NgJ7gbZTqBGxGBELEbGwZs2avMLtidf4m9kkyG0SOCLO6PRzSecBrwZeEePUj4LliV738jGzcVbUKqAzgYuB/xURA+yAWxyv8TezcVfUHMDlwDOBmyVtkXRlQXGYmVVWIVcAEXFMEe9rZmbLyrAKyMzMCuAEkHJrBzOrGjeDw60dzKyafAWAWzuYWTU5AeDWDmZWTU4AuLWDmVWTEwBu7WBm1eQEgLdvNLNqGusEMMylm2vXwvbtsLSUfPfgb2aTbmyXgXrpppnZYMb2CsBLN83MBjO2CcBLN83MBjO2CcBLN83MBjO2CcBLN83MBjO2CcBLN83MBjO2q4DAu3KZmQ1ibK8AzMxsME4AZmYV5QRgZlZRTgBmZhXlBGBmVlGKiKJjyEzSLmDHiN92NbB7xO9Zdv5MDuTP5ED+TFor4nOZi4g1zQfHKgEUQdLmiFgoOo4y8WdyIH8mB/Jn0lqZPheXgMzMKsoJwMysopwAulssOoAS8mdyIH8mB/Jn0lppPhfPAZiZVZSvAMzMKsoJwMysopwAMpB0qaR/k3SvpE9KOqzomIom6bWS7pe0JKkUS9qKIulMSd+Q9ICktxUdT9EkfUTSdyVtLTqWspB0pKTbJG1L/91sKDomcALI6mbghRFxAvBN4A8LjqcMtgK/AtxedCBFkjQFfAh4JXA8cI6k44uNqnAfA84sOoiS2Qu8NSKeD5wC/G4Z/j9xAsggIj4fEXvTp18BjigynjKIiG0R8Y2i4yiBk4EHIuLbEfEU8NfAawqOqVARcTvwaNFxlElEPBwRd6WPnwC2Ac8rNiongH6sAz5bdBBWGs8DHmx4/hAl+Idt5SVpHjgJ+GqxkYz5jmDDJOkW4DktfrQxIv4h/Z2NJJdytVHGVpQsn4mhFse8ttpaknQocANwUUR8v+h4nABSEXFGp59LOg94NfCKqMjNE90+EwOSM/4jG54fAfxHQbFYiUlaSTL41yLi74qOB1wCykTSmcDFwFkRsafoeKxUvg4cK+koSQcDvwF8quCYrGQkCbgK2BYRHyg6njongGwuB54J3Cxpi6Qriw6oaJLOlvQQcCrwaUk3FR1TEdLFAW8CbiKZ2Ls+Iu4vNqpiSfo48C/AcZIekvRbRcdUAi8DzgVOT8eQLZJ+qeig3ArCzKyifAVgZlZRTgBmZhXlBGBmVlFOAGZmFeUEYGZWUU4ANnKSZhqWwv2npH9PHz8m6V9HHMuJjcvxJJ3Vb0dPSdslrW5x/NmSrpH0rfSrJunHBom7zfu3/VskvVPS7w/7PW28OQHYyEXEIxFxYkScCFwJ/J/08YnA0rDfT1KnO95PBH40aEbEpyLifUMO4Srg2xFxdEQcDTxA0jFz2Ebxt9gEcQKwspmS9OG0Z/rnJR0CIOloSZ+TdKekL0n6qfT4nKRb070abpU0mx7/mKQPSLoNeL+k/5b2qf+6pLslvSa9c/fdwOvSK5DXSXqjpMvT1/jxdP+He9Kvl6bH/z6N435J6zv9MZKOAX4WuKTh8LuBF0k6TtLLJd3Y8PuXS3pj+vjtabxbJS2md5Mi6QuS3i/pa5K+Kennuv0tTTG1+yxfm77XPZIq3ea7KpwArGyOBT4UES8AHgN+NT2+CLw5In4W+H3givT45cA16V4NNeCDDa/1k8AZEfFWYCPwTxHxYuA04FJgJfB24BPpFcknmmL5IPDFiHgR8DNA/Q7fdWkcC8BbJM10+HuOB7ZExL76gfTx3cDzu3wWl0fEiyPihcAhJL2o6g6KiJOBi4B3pK2oO/0tjdp9lm8HfjH9e8/qEptNADeDs7L5TkRsSR/fCcynHRRfCvxNehIM8Iz0+6kkG9MAXAv8ScNr/U3DwPsLwFkNdfBVwGyXWE4H3gA/GrQfT4+/RdLZ6eMjSZLWI21eQ7TuDtqqi2iz0yT9ATANHE6SgP4x/Vm9mdidwHyG10retPNneQfwMUnXN7y+TTAnACubHzY83kdy5rsCeCydJ+imcbD9r4bHAn61eRMbSS/pJThJLwfOAE6NiD2SvkCSTNq5HzhJ0oqIWEpfYwVwAnAXSRJqvBJflf7OKpIz84WIeFDSO5vep/457aO3f8dtP8uIuCD9PF4FbJF0YkS0S2w2AVwCstJL+6Z/R9JrIemsKOlF6Y//maQDJ8Ba4MttXuYm4M0NdfST0uNPkDT6a+VW4HfS35+S9Czg2cD30sH/p0i29+sU+wMk5Z4/ajj8R8CtEbET2AEcL+kZkp4NvCL9nfpgvzs9a/+1Tu+T4W+px9P2s5R0dER8NSLeDuxm/zbXNoGcAGxcrAV+S9I9JGfV9W0X3wKcL+lekm6L7TbbvoSk5n+vks3K65Oyt5EMwFskva7pv9lAUoa5j6TU8gLgc8BB6ftdQrJFaDfrSFpGPyBpF0nSuAAgIh4ErgfuJZnDuDs9/hjwYeA+4O9J2k530+lvadTus7xU0n3p53M7cE+G97Qx5m6gZiMk6TjgMySTsJ8pOh6rNicAM7OKcgnIzKyinADMzCrKCcDMrKKcAMzMKsoJwMysopwAzMwq6v8DtPPfybkIK94AAAAASUVORK5CYII=\n",
      "text/plain": [
       "<Figure size 432x288 with 1 Axes>"
      ]
     },
     "metadata": {
      "needs_background": "light"
     },
     "output_type": "display_data"
    }
   ],
   "source": [
    "# QQ图默认对比已知分布和正态分布的一致性\n",
    "plt.show(qqplot(ss.norm.rvs(size=100)))"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 相关系数"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 14,
   "metadata": {},
   "outputs": [],
   "source": [
    "import pandas as pd"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 15,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.9333729600465923"
      ]
     },
     "execution_count": 15,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "s1 = pd.Series([0.1,0.2,1.1,2.4,1.3,0.3,0.5])\n",
    "s2 = pd.Series([0.5,0.4,1.2,2.5,1.1,0.7,0.1])\n",
    "# 皮尔逊相关系数\n",
    "s1.corr(s2)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 16,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "0.7142857142857144"
      ]
     },
     "execution_count": 16,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# spearman相关系数\n",
    "s1.corr(s2,method='spearman')"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 17,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.2</td>\n",
       "      <td>1.1</td>\n",
       "      <td>2.4</td>\n",
       "      <td>1.3</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.4</td>\n",
       "      <td>1.2</td>\n",
       "      <td>2.5</td>\n",
       "      <td>1.1</td>\n",
       "      <td>0.7</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2    3    4    5    6\n",
       "0  0.1  0.2  1.1  2.4  1.3  0.3  0.5\n",
       "1  0.5  0.4  1.2  2.5  1.1  0.7  0.1"
      ]
     },
     "execution_count": 17,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df = pd.DataFrame([s1,s2])\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 18,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "      <th>2</th>\n",
       "      <th>3</th>\n",
       "      <th>4</th>\n",
       "      <th>5</th>\n",
       "      <th>6</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "      <td>-1.0</td>\n",
       "      <td>1.0</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1    2    3    4    5    6\n",
       "0  1.0  1.0  1.0  1.0 -1.0  1.0 -1.0\n",
       "1  1.0  1.0  1.0  1.0 -1.0  1.0 -1.0\n",
       "2  1.0  1.0  1.0  1.0 -1.0  1.0 -1.0\n",
       "3  1.0  1.0  1.0  1.0 -1.0  1.0 -1.0\n",
       "4 -1.0 -1.0 -1.0 -1.0  1.0 -1.0  1.0\n",
       "5  1.0  1.0  1.0  1.0 -1.0  1.0 -1.0\n",
       "6 -1.0 -1.0 -1.0 -1.0  1.0 -1.0  1.0"
      ]
     },
     "execution_count": 18,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# df是针对列进行相关系数计算，所以需要转化\n",
    "df.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 20,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>0.1</td>\n",
       "      <td>0.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.2</td>\n",
       "      <td>0.4</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>2</td>\n",
       "      <td>1.1</td>\n",
       "      <td>1.2</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>3</td>\n",
       "      <td>2.4</td>\n",
       "      <td>2.5</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>4</td>\n",
       "      <td>1.3</td>\n",
       "      <td>1.1</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>5</td>\n",
       "      <td>0.3</td>\n",
       "      <td>0.7</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>6</td>\n",
       "      <td>0.5</td>\n",
       "      <td>0.1</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "     0    1\n",
       "0  0.1  0.5\n",
       "1  0.2  0.4\n",
       "2  1.1  1.2\n",
       "3  2.4  2.5\n",
       "4  1.3  1.1\n",
       "5  0.3  0.7\n",
       "6  0.5  0.1"
      ]
     },
     "execution_count": 20,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 转化成np.array(),求转置，再计算相关系数\n",
    "df = pd.DataFrame(np.array([s1,s2]).T)\n",
    "df"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 21,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.933373</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.933373</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1\n",
       "0  1.000000  0.933373\n",
       "1  0.933373  1.000000"
      ]
     },
     "execution_count": 21,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.corr()"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 22,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/html": [
       "<div>\n",
       "<style scoped>\n",
       "    .dataframe tbody tr th:only-of-type {\n",
       "        vertical-align: middle;\n",
       "    }\n",
       "\n",
       "    .dataframe tbody tr th {\n",
       "        vertical-align: top;\n",
       "    }\n",
       "\n",
       "    .dataframe thead th {\n",
       "        text-align: right;\n",
       "    }\n",
       "</style>\n",
       "<table border=\"1\" class=\"dataframe\">\n",
       "  <thead>\n",
       "    <tr style=\"text-align: right;\">\n",
       "      <th></th>\n",
       "      <th>0</th>\n",
       "      <th>1</th>\n",
       "    </tr>\n",
       "  </thead>\n",
       "  <tbody>\n",
       "    <tr>\n",
       "      <td>0</td>\n",
       "      <td>1.000000</td>\n",
       "      <td>0.714286</td>\n",
       "    </tr>\n",
       "    <tr>\n",
       "      <td>1</td>\n",
       "      <td>0.714286</td>\n",
       "      <td>1.000000</td>\n",
       "    </tr>\n",
       "  </tbody>\n",
       "</table>\n",
       "</div>"
      ],
      "text/plain": [
       "          0         1\n",
       "0  1.000000  0.714286\n",
       "1  0.714286  1.000000"
      ]
     },
     "execution_count": 22,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "df.corr(method='spearman')"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### 线性回归 y = wx + b"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 23,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[0.],\n",
       "       [1.],\n",
       "       [2.],\n",
       "       [3.],\n",
       "       [4.],\n",
       "       [5.],\n",
       "       [6.],\n",
       "       [7.],\n",
       "       [8.],\n",
       "       [9.]])"
      ]
     },
     "execution_count": 23,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "x = np.arange(10).astype(np.float).reshape((10,1))\n",
    "x"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 24,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 4.28641058],\n",
       "       [ 7.94062331],\n",
       "       [10.38510338],\n",
       "       [13.28458232],\n",
       "       [16.82820008],\n",
       "       [19.32953052],\n",
       "       [22.31209976],\n",
       "       [25.5777032 ],\n",
       "       [28.10107974],\n",
       "       [31.76226065]])"
      ]
     },
     "execution_count": 24,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "y = x * 3 + 4 + np.random.random((10,1))\n",
    "y"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 25,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[ 4.50932133],\n",
       "       [ 7.50297422],\n",
       "       [10.49662712],\n",
       "       [13.49028001],\n",
       "       [16.48393291],\n",
       "       [19.4775858 ],\n",
       "       [22.4712387 ],\n",
       "       [25.46489159],\n",
       "       [28.45854448],\n",
       "       [31.45219738]])"
      ]
     },
     "execution_count": 25,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "from sklearn.linear_model import LinearRegression\n",
    "reg = LinearRegression()\n",
    "# 拟合\n",
    "res = reg.fit(x,y)\n",
    "# 求估计值\n",
    "y_pred = res.predict(x)\n",
    "y_pred"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 26,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2.99365289]])"
      ]
     },
     "execution_count": 26,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 参数w\n",
    "reg.coef_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 27,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([4.50932133])"
      ]
     },
     "execution_count": 27,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 截距b\n",
    "reg.intercept_"
   ]
  },
  {
   "cell_type": "markdown",
   "metadata": {},
   "source": [
    "### PCA主成分分析"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 28,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[2.5, 2.4],\n",
       "       [0.5, 0.7],\n",
       "       [2.2, 2.9],\n",
       "       [1.9, 2.2],\n",
       "       [3.1, 3. ],\n",
       "       [2.3, 2.7],\n",
       "       [2. , 1.6],\n",
       "       [1. , 1.1],\n",
       "       [1.5, 1.6],\n",
       "       [1.1, 0.9]])"
      ]
     },
     "execution_count": 28,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "data = np.array([np.array([2.5,0.5,2.2,1.9,3.1,2.3,2,1,1.5,1.1]),\n",
    "                 np.array([2.4,0.7,2.9,2.2,3,2.7,1.6,1.1,1.6,0.9])]).T\n",
    "data"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 29,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([0.96318131])"
      ]
     },
     "execution_count": 29,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# sklearn中的PCA方法，使用的是奇异值分解SVD的方法\n",
    "from sklearn.decomposition import PCA\n",
    "# 降维-降成1维\n",
    "lower_dim = PCA(n_components=1)\n",
    "lower_dim.fit(data)\n",
    "# 降维后维度的属性\n",
    "lower_dim.explained_variance_ratio_"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 30,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "array([[-0.82797019],\n",
       "       [ 1.77758033],\n",
       "       [-0.99219749],\n",
       "       [-0.27421042],\n",
       "       [-1.67580142],\n",
       "       [-0.9129491 ],\n",
       "       [ 0.09910944],\n",
       "       [ 1.14457216],\n",
       "       [ 0.43804614],\n",
       "       [ 1.22382056]])"
      ]
     },
     "execution_count": 30,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 直接得到转化后的数值\n",
    "lower_dim.fit_transform(data)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": 31,
   "metadata": {},
   "outputs": [
    {
     "data": {
      "text/plain": [
       "(array([[-0.82797019],\n",
       "        [ 1.77758033],\n",
       "        [-0.99219749],\n",
       "        [-0.27421042],\n",
       "        [-1.67580142],\n",
       "        [-0.9129491 ],\n",
       "        [ 0.09910944],\n",
       "        [ 1.14457216],\n",
       "        [ 0.43804614],\n",
       "        [ 1.22382056]]), array([0.0490834 +0.j, 1.28402771+0.j]))"
      ]
     },
     "execution_count": 31,
     "metadata": {},
     "output_type": "execute_result"
    }
   ],
   "source": [
    "# 手写实现PCA方法，求协方差矩阵，求特征值和特征向量，得到转化后的值\n",
    "def myPCA(data,n_components=1000000):\n",
    "    # 对每列求均值\n",
    "    mean_vals = np.mean(data,axis=0)\n",
    "    # 每个数减去均值\n",
    "    mid = data - mean_vals\n",
    "    # 得到协方差矩阵\n",
    "    cov_mat = np.cov(mid,rowvar=False)\n",
    "    # 导入线性计算包\n",
    "    from scipy import linalg\n",
    "    # 求特征值和特征向量\n",
    "    eig_vals,eig_vects = linalg.eig(np.mat(cov_mat))\n",
    "    # 对特征值排序，记录索引\n",
    "    eig_val_index = np.argsort(eig_vals)\n",
    "    # 取k个最大的特征值\n",
    "    eig_val_index = eig_val_index[:-(n_components+1):-1]\n",
    "    eig_vects = eig_vects[:,eig_val_index]\n",
    "    # 点乘求降维后的矩阵\n",
    "    low_dim_mat = np.dot(mid,eig_vects)\n",
    "    return low_dim_mat,eig_vals\n",
    "\n",
    "myPCA(data,n_components=1)"
   ]
  },
  {
   "cell_type": "code",
   "execution_count": null,
   "metadata": {},
   "outputs": [],
   "source": []
  }
 ],
 "metadata": {
  "kernelspec": {
   "display_name": "Python 3",
   "language": "python",
   "name": "python3"
  },
  "language_info": {
   "codemirror_mode": {
    "name": "ipython",
    "version": 3
   },
   "file_extension": ".py",
   "mimetype": "text/x-python",
   "name": "python",
   "nbconvert_exporter": "python",
   "pygments_lexer": "ipython3",
   "version": "3.6.8"
  }
 },
 "nbformat": 4,
 "nbformat_minor": 4
}
